# Dickstein, Ho, and Mark (2023)
# This script creates data used in table 4 of the paper

# Preliminaries
setwd("../../../../library")
source("PreliminariesCode.R")
library(readr)
library(knitr)
library(readxl)
library(numDeriv)
library(stringr)
library(yaml)
library(haven)
try(library(Hmisc))

main <- function() {
  data <- read_csv(paste0(project_folder, "/analysis/tablesandfigures/build/paper_sumstats/data/harmonized_subs.csv"))
  sg_data <- data %>% filter(sg_vs_indiv == "Small Group")
  indiv_data <- data %>% filter(sg_vs_indiv == "Individual")
  
  get_ctft_params(data = indiv_data, "lowrisk_simplemh_censor0.2", "main")
  get_ctft_params(data = indiv_data, "lowrisk_simplemh_censor0.5", "main")
  
  get_ctft_params(data = sg_data, "lowrisk_simplemh_censor0.2", "switcher")
  get_ctft_params(data = sg_data, "lowrisk_simplemh_censor0.5", "switcher")
}


get_ctft_params <- function(data, run, type) {
  # Load the estimates
    covar_names_csv <- read_csv(paste0(project_folder, "/analysis/estimationcode/specs/", run, "_", type, "/output", "/covar_names.csv"))
    covar_lens_csv <- read_csv(paste0(project_folder, "/analysis/estimationcode/specs/", run, "_", type, "/output", "/covar_lens.csv"))
    ests <- read_csv(paste0(project_folder, "/analysis/estimationcode/specs/", run, "_", type, "/output", "/se.csv"))
    varcov_csv <- read_csv(paste0(project_folder, "/analysis/estimationcode/specs/", run, "_", type, "/output", "/varcov.csv"))
  
  theta <- ests$solution  
  covar_names <- covar_names_csv[['x']]
  covar_lens <- covar_lens_csv[['x']]
  cost_covar_lens <- covar_lens[1:2]
  
  # Back out individual covar names and beta params from covar_lanes
  W1_names <- covar_names[1:covar_lens[1]]
  W2_names <- covar_names[(1+covar_lens[1]):(covar_lens[1]+covar_lens[2])]
  W3_names <- covar_names[(1+covar_lens[1]+covar_lens[2]):(covar_lens[1]+covar_lens[2]+covar_lens[3])]
  
  beta1 <- theta[1:covar_lens[1]]
  beta2 <- theta[(1+covar_lens[1]):(covar_lens[1]+covar_lens[2])]
  beta3 <- theta[(1+covar_lens[1]+covar_lens[2]):(covar_lens[1]+covar_lens[2]+covar_lens[3])]
  
  sum_low_risk_threshold <- 0.2575
  data <- data %>%
    mutate(
      orig_subs_id = subscriberid,
      subscriberid_year = paste0(subscriberid, "_", year),
      low_sum_risk =  if_else(sum_concurrent_risk <= sum_low_risk_threshold, 1, 0),
      x_av = best_guess_av/100,
      # Family type
      nokids = 1 - withkids,
      single = 1 - married,
      single_membered = single * nokids,
      married_withkids = if_else(withkids * married == 1, 1, 0),
      single_withkids = if_else(single * married == 1, 1, 0),
      cons = 1)
  
  W1_mat <- as.matrix(data[W1_names])
  W2_mat <- as.matrix(data[W2_names])
  W3_mat <- as.matrix(data[W3_names])
  
  # Construct parameter vectors
  data['alpha_i'] <- exp(W1_mat %*% beta1)
  data['omega_i'] <- exp(W2_mat %*% beta2)
  data['psi_i']   <- exp(W3_mat %*% beta3)
  
  set.seed(4197169)
  data <- data %>%
    mutate(
      lambda = rexp(n(), rate=alpha_i),
      wlambda=lambda*(lambda < 89.37073) + 89.37073*(lambda > 89.37073),
      simcost=(x_av+omega_i*x_av^2)*lambda,
      wsimcost=simcost*(simcost < 89.37073) + 89.37073*(simcost > 89.37073))
  output_path <- paste0(project_folder, "/analysis/tablesandfigures/build/ctftparams/specs/", run, "_", type)
  
  # Prepare file path
  dir.create(output_path, recursive = T)
  f <- list.files(output_path, include.dirs = F, full.names = T)
  file.remove(f)
  write_csv(data, paste0(output_path, "/ctftparams.csv"))
}

main()
